Loading libraries

## [1] "/Users/nasa_william_lu/DATA1901_Project_2"

Reading in files

# Reading the `csv` files
l_parramatta_distance <- read.csv("csv_cache/l_2150_parramatta_houseprice.csv")
l_merrylands_distance <- read.csv("csv_cache/l_2160_merrylands_houseprice.csv")
l_eastwood_distance <- read.csv("csv_cache/l_2122_eastwood_houseprice.csv")
# l_auburn_distance <- read.csv("csv_cache/l_2144_auburn_houseprice.csv")
l_granville_distance <- read.csv("csv_cache/l_2142_granville_houseprice.csv")

# Categorize the data set `l_parramatta_dist` into different distance categories
l_parramatta_distance$"distance_to_train_station(km)" <- l_parramatta_distance$distance_to_train_station/1000
# l_auburn_distance$"distance_to_train_station(km)" <- l_auburn_dist$distance_to_train_station/1000
l_merrylands_distance$"distance_to_train_station(km)" <- l_merrylands_distance$distance_to_train_station/1000
l_eastwood_distance$"distance_to_train_station(km)" <- l_eastwood_distance$distance_to_train_station/1000
l_granville_distance$"distance_to_train_station(km)" <- l_granville_distance$distance_to_train_station/1000

# # Removing that `FIRST` row of previous `dist`
# l_parramatta_dist2 <- l_parramatta_distance[, -1]
# l_eastwood_dist2 <- l_eastwood_distance[, -1]
# l_merrylands_dist2 <-l_merrylands_distance[, -1]
# l_granville_dist2 <-l_granville_distance[, -1]
# # l_auburn_dist2 <-l_auburn_dist[, -1]

Classing distance

l_parramatta_distance$distance_class <- cut(l_parramatta_distance$"distance_to_train_station(km)",breaks=c(0,0.250,0.500,0.750,1.000,1.250,1.500,1.750,2.000,2.250,2.500,3.000,3.250,3.500,3.750, 4.000))
l_merrylands_distance$distance_class <- cut(l_merrylands_distance$"distance_to_train_station(km)",breaks=c(0,0.250,0.500,0.750,1.000,1.250,1.500,1.750,2.000,2.250,2.500, 3.000,3.250,3.500,3.750, 4.000))
# l_auburn_distance$distance_class <- cut(l_auburn_distance$"distance_to_train_station(km)",breaks=c(0,0.250,0.500,0.750,1.000,1.250,1.500,1.750,2.000,2.250,2.500,3.000,3.250,3.500,3.750, 4.000))
l_eastwood_distance$distance_class <- cut(l_eastwood_distance$"distance_to_train_station(km)",breaks=c(0,0.250,0.500,0.750,1.000,1.250,1.500,1.750,2.000,2.250,2.500,3.000,3.250,3.500,3.750, 4.000))
l_granville_distance$distance_class <- cut(l_granville_distance$"distance_to_train_station(km)",breaks=c(0,0.250,0.500,0.750,1.000,1.250,1.500,1.750,2.000,2.250,2.500,3.000,3.250,3.500,3.750, 4.000))

Combining Data

combined_df <-rbind(l_parramatta_distance, l_merrylands_distance, l_eastwood_distance, l_granville_distance)

Filtering Data

combined_df_1bed <-filter(combined_df, bedroom ==1)
combined_df_2bed <-filter(combined_df, bedroom ==2)
combined_df_3bed <-filter(combined_df, bedroom ==3)
combined_df_4bed <-filter(combined_df, bedroom ==4)
combined_df_5bed <-filter(combined_df, bedroom ==5)
par(mfrow=c(1,2))
ggplot(combined_df_1bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 1 Bedroom", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

ggplot(combined_df_1bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5, aes(fill=factor(carspace))) +
  labs(title = "Sold Price vs Distance from Train Station for 1 Bedroom", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

ggplot(combined_df_2bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 2 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_2bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  185000  340000  412000  444110  535000 1180000
ggplot(combined_df_2bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5, aes(fill=factor(carspace))) +
  labs(title = "Sold Price vs Distance from Train Station for 2 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_2bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  185000  340000  412000  444110  535000 1180000
ggplot(combined_df_3bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 3 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_3bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     575  400000  520000  563484  665000 2020000
ggplot(combined_df_3bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5, aes(fill=factor(carspace))) +
  labs(title = "Sold Price vs Distance from Train Station for 3 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_3bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     575  400000  520000  563484  665000 2020000
ggplot(combined_df_4bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 4 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_4bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  160000  510000  595000  687836  825000 1950000
ggplot(combined_df_4bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5, aes(fill=factor(carspace))) +
  labs(title = "Sold Price vs Distance from Train Station for 4 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_4bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  160000  510000  595000  687836  825000 1950000
ggplot(combined_df_5bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 5 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_5bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  595000  850000 1320000 1274556 1628000 2090000
ggplot(combined_df_5bed, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5, aes(fill=factor(carspace))) +
  labs(title = "Sold Price vs Distance from Train Station for 5 Bedrooms", x="Distance from Train Station(km)", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))+
  theme(plot.title = element_text(hjust=0.25))

summary(combined_df_5bed$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  595000  850000 1320000 1274556 1628000 2090000

Filtering Data by Carspaces and Bedrooms

combined_df_1bed_1car <-filter(combined_df, bedroom ==1, carspace == 1)

combined_df_2bed_1car <-filter(combined_df, bedroom ==2, carspace == 1)

combined_df_2bed_2car <-filter(combined_df, bedroom ==2, carspace == 2)

combined_df_3bed_1car <-filter(combined_df, bedroom ==3, carspace == 1)

combined_df_3bed_2car <-filter(combined_df, bedroom ==3, carspace == 2)

combined_df_3bed_3car <-filter(combined_df, bedroom ==3, carspace == 3)

combined_df_3bed_4car <-filter(combined_df, bedroom ==3, carspace == 4)

combined_df_4bed_1car <-filter(combined_df, bedroom ==4, carspace == 1)

combined_df_4bed_2car <-filter(combined_df, bedroom ==4, carspace == 2)

combined_df_4bed_3car <-filter(combined_df, bedroom ==4, carspace == 3)

combined_df_4bed_4car <-filter(combined_df, bedroom ==4, carspace == 4)

combined_df_5bed_1car <-filter(combined_df, bedroom ==5, carspace == 1)

combined_df_5bed_2car <-filter(combined_df, bedroom ==5, carspace == 2)

combined_df_5bed_3car <-filter(combined_df, bedroom ==5, carspace == 3)

1 bedroom

ggplot(combined_df_1bed_1car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 1 Bedroom and 1 Carspace", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_1bed_1car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  243000  334750  426500  426500  518250  610000

2 bedrooms

ggplot(combined_df_2bed_1car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 2 Bedrooms and 1 Carspace", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_2bed_1car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  230000  337000  410000  440860  530000 1180000
ggplot(combined_df_2bed_2car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 2 Bedrooms and 2 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_2bed_2car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  225000  371000  467600  491252  588875 1120000

3 bedrooms

ggplot(combined_df_3bed_1car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 3 Bedrooms and 1 Carspace", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3bed_1car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  165000  390000  481575  529293  624500 2020000
ggplot(combined_df_3bed_2car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 3 Bedrooms and 2 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3bed_2car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     575  418500  577500  593394  689500 1950000
ggplot(combined_df_3bed_3car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 3 Bedrooms and 3 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3bed_2car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     575  418500  577500  593394  689500 1950000
ggplot(combined_df_3bed_4car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 3 Bedrooms and 4 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3bed_4car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  429500  483000  488000  479700  488000  510000

4 bedrooms

ggplot(combined_df_4bed_1car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 4 Bedrooms and 1 Carspace", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_4bed_1car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  320000  450033  575000  638478  680000 1625000
ggplot(combined_df_4bed_2car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 4 Bedrooms and 2 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_4bed_2car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  160000  528000  595000  713046  868000 1950000
ggplot(combined_df_4bed_3car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 4 Bedrooms and 3 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_4bed_3car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  720000  731250  742500  742500  753750  765000
ggplot(combined_df_4bed_4car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 4 Bedrooms and 4 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_4bed_4car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  456000  580250  595000  629000  691250  800000

5 bedrooms

ggplot(combined_df_5bed_1car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 5 Bedrooms and 1 Carspace", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_5bed_1car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 
ggplot(combined_df_5bed_2car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 5 Bedrooms and 2 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_5bed_2car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  595000  850000 1320000 1274556 1628000 2090000
ggplot(combined_df_5bed_3car, aes(x = distance_class, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs Distance from Train Station for 5 Bedrooms and 3 Carspaces", x="Distance from Train Station(km)", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_5bed_3car$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 

Creating a column for Year

combined_df$Year <- as.factor(format(as.Date(combined_df$yearsold), "%Y"))
# Filtering by year
combined_df_0.00 <-filter(combined_df, distance_class == "(0,0.25]")
combined_df_0.25 <-filter(combined_df, distance_class == "(0.25,0.5]")
combined_df_0.50 <-filter(combined_df, distance_class == "(0.5,0.75]")
combined_df_0.75 <-filter(combined_df, distance_class == "(0.75,1]")
combined_df_1.00 <-filter(combined_df, distance_class == "(1,1.25]")
combined_df_1.25 <-filter(combined_df, distance_class == "(1.25,1.5]")
combined_df_1.50 <-filter(combined_df, distance_class == "(1.5,1.75]")
combined_df_1.75 <-filter(combined_df, distance_class == "(1.75,2]")
combined_df_2.00 <-filter(combined_df, distance_class == "(2,2.25]")
combined_df_2.25 <-filter(combined_df, distance_class == "(2.25,2.5]")
combined_df_2.50 <-filter(combined_df, distance_class == "(2.5,2.75]")
combined_df_2.75 <-filter(combined_df, distance_class == "(2.75,3]")
combined_df_3.00 <-filter(combined_df, distance_class == "(3,3.25]")
combined_df_3.25 <-filter(combined_df, distance_class == "(3.25,3.5]")
combined_df_3.50 <-filter(combined_df, distance_class == "(3.5,3.75]")
combined_df_3.75 <-filter(combined_df, distance_class == "(3.75,4]")
ggplot(combined_df_0.00, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 0 to 0.25km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_0.00$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  339000  365000  370000  427333  502500  575000
ggplot(combined_df_0.25, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 0.25 to 0.50km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_0.25$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  165000  361250  518500  516118  596250 1545000
ggplot(combined_df_0.50, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 0.50 to 0.75km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_0.50$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  185000  410500  548000  576762  690000 2020000
ggplot(combined_df_0.75, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 0.75 to 1.00km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_0.75$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  240000  391000  510000  563944  676500 1950000
ggplot(combined_df_1.00, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 1.00 to 1.25km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_1.00$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     575  379975  456000  538804  641550 1950000
ggplot(combined_df_1.25, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 1.25 to 1.50km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_1.25$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  129000  372500  446000  495032  585000 1500000
ggplot(combined_df_1.50, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 1.50 to 1.75km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_1.50$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  310000  432000  575000  600498  657475 2090000
ggplot(combined_df_1.75, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 1.75 to 2.00km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_1.75$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  307000  422500  575000  625769  762500 1515000
ggplot(combined_df_2.00, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 2.00 to 2.25km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_2.00$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  160000  400000  467500  486962  568750  770000
ggplot(combined_df_2.25, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 2.25 to 2.50km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_2.25$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  340000  382500  460000  508215  666250  775000
ggplot(combined_df_2.50, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 2.50 to 2.75km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_2.50$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 
ggplot(combined_df_2.75, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 2.75 to 3.00km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_2.75$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 
ggplot(combined_df_3.00, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 3.00 to 3.25km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3.00$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  390000  395000  483000  514201  605000  698007
ggplot(combined_df_3.25, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 3.25 to 3.75km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3.25$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  555000  555000  555000  555000  555000  555000
ggplot(combined_df_3.50, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 3.50 to 3.75km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3.50$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  250000  300000  355000  389245  445750  664000
ggplot(combined_df_3.75, aes(x = Year, y = soldprice/100000))+
geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
  labs(title = "Sold Price vs year for townhouses 3.75 to 4.00km from train station", x="Year", y="Selling Price (x$100000)")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,hjust=1))

summary(combined_df_3.75$soldprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 
ggplot(combined_df, aes(x = Year, y = soldprice/100000))+
    geom_point(aes(color=distance_class)) +
    labs(title = "Sold Price over Years", x="Year", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
    theme_bw()+
    theme(axis.text.x = element_text(angle=45,hjust=1))+
    theme(plot.title = element_text(hjust=0.25))

ggplot(combined_df, aes(x = Year, y = soldprice/100000))+
    geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
    labs(title = "Sold Price over Years", x="Year", y="Selling Price (x$100000)", fill = "Number of Carspaces")+
    theme_bw()+
    theme(axis.text.x = element_text(angle=45,hjust=1))+
    theme(plot.title = element_text(hjust=0.25))

ggplot(combined_df, aes(x = factor(bedroom), y = soldprice/100000))+
    geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
    labs(title = "Sold Price for Different Numbers of Bedrooms", x="Number of Bedrooms", y="Selling Price (x$100000)")+
    theme_bw()+
    theme(axis.text.x = element_text(angle=45,hjust=1))+
    theme(plot.title = element_text(hjust=0.25))

ggplot(combined_df, aes(x = factor(bathroom), y = soldprice/100000))+
    geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
    labs(title = "Sold Price for Different Numbers of Bathrooms", x="Number of Bathrooms", y="Selling Price (x$100000)")+
    theme_bw()+
    theme(axis.text.x = element_text(angle=45,hjust=1))+
    theme(plot.title = element_text(hjust=0.25))

ggplot(combined_df, aes(x = factor(carspace), y = soldprice/100000))+
    geom_boxplot(outlier.colour = "blue", outlier.size=1.5) +
    labs(title = "Sold Price for Different Numbers of Carspaces", x="Number of Carspaces", y="Selling Price (x$100000)")+
    theme_bw()+
    theme(axis.text.x = element_text(angle=45,hjust=1))+
    theme(plot.title = element_text(hjust=0.25))